############################################################
# Did the experiment work?
# 
# Author: 
# Soubhik Barari
# 
# Environment:
# - must use R 3.6
# 
# Input:
# - 00-read_clean_data.R
#
# Output:
# - fig/causal*{.png,.pdf}
############################################################

source("00-read_clean_data.R")

#----------------------------------------------------------
# Compliance with treatments?
#----------------------------------------------------------

c_group <- responses_df$treatment == "control"
t1_group <- responses_df$treatment == "one"
t1_group_compliers <- t1_group & responses_df$Reflection != ""
t2_group <- responses_df$treatment == "two"
t2_group_compliers <- t2_group & responses_df$T2 != ""

cat("\ncompliance w/treatment 1 (reflected):\n")
cat(sum(t1_group_compliers)/sum(t1_group))
# 0.9760148

cat("\ncompliance w/treatment 2 (listed a loved one):\n")
cat(sum(t2_group_compliers)/sum(t2_group))
# 0.8894928

#----------------------------------------------------------
# Effect of treatments?
#----------------------------------------------------------

outcome_cols <- c(
	"social",    #cancel social gatherings
	"handshake", #handshakes
	"stores", #close all stores
	"curfew", #general curfew
	"SOB_1",  #perception of social (how many out of 100)
	"SOB_2",  #perception of handshakes 
	"SOB_3",  #perception of close all stores 
	"SOB_4"   #perception of curfew 
)
responses_df$condition <- ifelse(
	c_group, 
	"control\n",
	ifelse(t1_group, 
		"externalities\n+ reflection\n", 
		"externalities\n+ naming\n")
)
#################################
########### ON BELIEFS
#################################
belief_cols <- c(
    "social", "handshake", "stores", "curfew"
)

causal_belief_df <- responses_df[,c(belief_cols, "condition")] %>%
	tidyr::gather("outcome", "value", belief_cols)

causal_belief_df$outcome <- ifelse(
	causal_belief_df$outcome == "social", 
	"Should cancel\nsocial\ngatherings",
	ifelse(
		causal_belief_df$outcome == "handshake", 
		"Should avoid\nhandshakes",
		ifelse(
			causal_belief_df$outcome == "stores", 
			"Should close\nnon-essential\nshops", "Should be\na general\ncurfew"
		)
	)
)
causal_belief_df$outcome <- factor(
    causal_belief_df$outcome,
    levels = c(
        "Should avoid\nhandshakes",
        "Should be\na general\ncurfew",
        "Should cancel\nsocial\ngatherings",
        "Should close\nnon-essential\nshops"
    )
)

causal_belief_viz_df <- causal_belief_df %>%
	group_by(condition, outcome) %>%
	mutate(n_tot=n()) %>%
	group_by(condition, outcome, value) %>%
	summarise(pct=n()/head(n_tot, 1))

causal_belief_viz_df %>%
	mutate(pct=pct*100, condition_n=as.numeric(condition)) %>%
	filter(value == "Yes") %>%
	ggplot(aes(x=outcome, y=pct, fill=condition)) + 
  	geom_bar(stat="identity", color="black", position=position_dodge()) +
    geom_text(aes(label=sprintf("%0.1f", pct), group=condition, x=outcome, y=pct+2),position = position_dodge(width = 1),
    vjust = -0.5, size = 2) +
  	ylab("percentage yes") +
  	xlab(sprintf("belief (n=%i)", nrow(responses_df))) +
  	theme_bw()
ggsave("fig/causal_belief.pdf", width=6, height=3)
ggsave("fig/causal_belief.png", width=6, height=3)

m <- glm(as.factor(social) ~ age_group + past_healthy_behavior + gender + health + treatment, data = responses_df, family=binomial(link="logit"))
summary(m) ##no effect

m <- glm(as.factor(handshake) ~ age_group + past_healthy_behavior + gender + health + treatment, data = responses_df, family=binomial(link="logit"))
summary(m) ##no effect

m <- glm(as.factor(stores) ~ age_group + past_healthy_behavior + gender + health + treatment, data = responses_df, family=binomial(link="logit"))
summary(m) ##no effect

m <- glm(as.factor(curfew) ~ age_group + past_healthy_behavior + gender + health + treatment, data = responses_df, family=binomial(link="logit"))
summary(m) ##no effect

#### heterogeneity by age group?
m <- glm(as.factor(handshake) ~ age_group + past_healthy_behavior + gender + health + treatment:age_group, data = responses_df, family=binomial(link="logit"))
summary(m) ##none really


#### heterogeneity by gender?
m <- glm(as.factor(social) ~ age_group + past_healthy_behavior + gender + health + treatment:gender, data = responses_df, family=binomial(link="logit"))
summary(m) ##none really


#### heterogeneity by past behavior?
table(responses_df$past_healthy_behavior)
# Low Medium   High 
# 43     67   3074 

responses_df$past_healthy_behavior_binary <- ifelse(
    responses_df$past_behavior_index < 73, "BelowAvg", #about the mean
    ifelse(
        responses_df$past_behavior_index >= 73, "AboveAvg", NA))

responses_df$past_healthy_behavior_binary <- factor(responses_df$past_healthy_behavior_binary, levels=c("BelowAvg", "AboveAvg"))

reg_df <- responses_df %>% filter(!is.na(past_healthy_behavior_binary))

m <- glm(as.factor(social) ~ age_group + gender + health + treatment + past_healthy_behavior + treatment:past_healthy_behavior, data = reg_df, family=binomial(link="logit"))
summary(m) ##no effect

m <- glm(as.factor(handshake) ~ age_group + gender + health + past_healthy_behavior + treatment:past_healthy_behavior, data = reg_df, family=binomial(link="logit"))
summary(m) ##no effect

m <- glm(as.factor(stores) ~ age_group + gender + health + past_healthy_behavior + treatment:past_healthy_behavior, data = reg_df, family=binomial(link="logit"))
summary(m) ##no effect

m <- glm(as.factor(curfew) ~ age_group + gender + health + past_healthy_behavior + treatment:past_healthy_behavior, data = reg_df, family=binomial(link="logit"))
summary(m) ##no effect

#################################
########### ON PERCEPTIONS
#################################
perceptions_cols <- c(
    "SOB_1", "SOB_2", "SOB_3", "SOB_4"
)

causal_percept_df <- responses_df[,c(perceptions_cols, "condition")] %>%
	tidyr::gather("outcome", "value", perceptions_cols)

causal_percept_df$outcome <- ifelse(
	causal_percept_df$outcome == "SOB_1", 
	"cancel social?",
	ifelse(
		causal_percept_df$outcome == "SOB_2", 
		"not handshake?",
		ifelse(
			causal_percept_df$outcome == "SOB_3", 
			"close shops?", "curfew?"
		)
	)
)

causal_percept_viz_df <- causal_percept_df %>%
	filter(value != "") %>%
	mutate(n_tot=n(), value=as.numeric(value)) %>%
	group_by(condition, outcome) %>%
	summarise(avg=mean(value), sd=sd(value))

causal_percept_viz_df %>%
	ggplot(aes(x=outcome, y=avg, fill=condition)) + 
  	geom_bar(stat="identity", color="black", position= position_dodge(width=0.9)) +
  	geom_errorbar(aes(ymin = avg-sd, ymax = avg+sd),  position = position_dodge(width=0.9), width = 0.25) +
  	ylab("avg # of compliant Italians perceived") +
  	xlab(sprintf("beliefs perceived of others (n=%i)", nrow(responses_df))) +
  	theme_bw()
ggsave("fig/causal_percept.pdf", width=6, height=3)
ggsave("fig/causal_percept.png", width=6, height=3)

############################################
########### ON FINANCIAL PUNISHMENT (AMT)
############################################
fin_cols <- c(
	"Geldstrafe_1_1", "Geldstrafe_2_1"
)
causal_fin_df <- responses_df[,c(fin_cols, "condition")] %>%
	tidyr::gather("outcome", "value", fin_cols)

causal_fin_df$outcome <- ifelse(
	causal_fin_df$outcome == "Geldstrafe_1_1", "gatherings\npunishment", "going out \nwith coronavirus\npunishment"
)
	
causal_fin_viz_df <- causal_fin_df %>%
	filter(value != "") %>%
	mutate(n_tot=n(), value=as.numeric(value)) %>%
	group_by(condition, outcome) %>%
	summarise(avg=mean(value), sd=sd(value))

causal_fin_viz_df %>%
	ggplot(aes(x=outcome, y=avg, fill=condition)) + 
  	geom_bar(stat="identity", color="black", position= position_dodge(width=0.9)) +
  	geom_errorbar(aes(ymin = avg-sd, ymax = avg+sd),  position = position_dodge(width=0.9), width = 0.25) +
  	ylab("avg financial penalty believed (£)") +
  	xlab(sprintf("risky behavior (n=%i)", nrow(responses_df))) +
  	theme_bw()
ggsave("fig/causal_punish.pdf", width=6, height=3)
ggsave("fig/causal_punish.png", width=6, height=3)

mod_react_govt = polr(perceivedreaction ~ age_years + gender + health + treatment, data = responses_df, Hess=TRUE)
summary(mod_react_govt, digits=3)

#################################
########### ON SKEPTICISM
#################################
pol_cols <- c(
	"perceivedreaction", #reaction of the govt
	"perceivedeffectivnes", #perceived effectivness of social distancing
	"Q23", #reaction of the public
	"Q36", #trust govt to take care of citizens
	"Q37" #factually accurate
)

causal_pol_df <- responses_df[,c(pol_cols, "condition")] %>%
	mutate(perceivedreaction=as.numeric(perceivedreaction),
		perceivedeffectivnes=as.numeric(perceivedeffectivnes),
		Q23=as.numeric(Q23),
		Q36=as.numeric(Q36),
		Q37=as.numeric(Q37)
	) %>%
	tidyr::gather("outcome", "value", pol_cols) %>%
	mutate(outcome = ifelse(
		outcome == "perceivedeffectivnes", "Social distancing\nis effective",
		ifelse(
			outcome == "perceivedreaction", "Govt reaction\nis appropriate",
			ifelse(
				outcome == "Q23", "Public reaction\nis appropriate",
				ifelse(
					outcome == "Q36", "I trust\nthe govt",
					ifelse(
						outcome == "Q37", "Govt\nCOVID-19 info\nis truthful", NA
					))))))

causal_pol_viz_df <- causal_pol_df %>%
	group_by(condition, outcome) %>%
	summarise(
	    y=mean(value), 
	    ymin=mean(value) - (1.96*sd(value)/sqrt(n())),
	    ymax=mean(value) + (1.96*sd(value)/sqrt(n()))
	)
causal_pol_viz_df$outcome <- factor(causal_pol_viz_df$outcome,
                                    levels=c(
                                        "Social distancing\nis effective",
                                        "Govt\nCOVID-19 info\nis truthful",
                                        "I trust\nthe govt",
                                        "Govt reaction\nis appropriate",
                                        "Public reaction\nis appropriate"
                                    ))

causal_pol_viz_df %>%
	ggplot(aes(x=outcome, y=y, fill=condition)) + 
  	geom_bar(stat="identity", color="black", position= position_dodge(width=0.9)) +
  	geom_errorbar(aes(ymin = ymin, ymax = ymax),  position = position_dodge(width=0.9), width = 0) +
    geom_text(aes(label=sprintf("%0.1f", y), group=condition, x=outcome, y=y+0.2),position = position_dodge(width = 1),
                  vjust = -0.5, size = 3) +
  	ylab("avg level (5-point scale)") +
  	xlab(sprintf("belief (n=%i)", nrow(responses_df))) +
  	theme_bw()
ggsave("fig/causal_politics.pdf", width=8, height=4)
ggsave("fig/causal_politics.png", width=8, height=4)


mod_react_govt = polr(perceivedreaction ~ age_group + past_healthy_behavior + gender + health + treatment, data = responses_df, Hess=TRUE)
summary(mod_react_govt, digits=3)
# Call:
#     polr(formula = perceivedreaction ~ age_group + past_healthy_behavior + 
#              gender + health + treatment, data = responses_df, Hess = TRUE)
# 
# Coefficients:
#     Value Std. Error t value
# age_group30-39              -0.3916     0.1171  -3.345
# age_group40-49              -0.5516     0.1147  -4.809
# age_group50-59              -0.3299     0.1229  -2.684
# age_group60+                -0.5397     0.1347  -4.006
# past_healthy_behaviorMedium -0.4762     0.5208  -0.914
# past_healthy_behaviorHigh   -1.0248     0.4286  -2.391
# genderFemale                -0.2158     0.0734  -2.939
# genderOther                 -0.4300     0.6367  -0.675
# healthFair                  -0.3349     0.2728  -1.227
# healthGood                  -0.1078     0.2656  -0.406
# healthExcellent              0.1312     0.2755   0.476
# treatmentone                 0.0470     0.0888   0.529
# treatmenttwo                 0.0158     0.0885   0.179

mod_eff = polr(perceivedeffectivnes ~ age_years + gender + health + treatment, data = responses_df, Hess=TRUE)
summary(mod_eff, digits=3)
# Call:
#     polr(formula = perceivedeffectivnes ~ age_years + gender + health + 
#              treatment, data = responses_df, Hess = TRUE)
# 
# Coefficients:
#     Value Std. Error t value
# age_years        0.0210    0.00245   8.564
# genderFemale     0.2455    0.06567   3.738
# genderOther     -0.9336    0.51278  -1.821
# healthFair       0.4813    0.24000   2.005
# healthGood       0.7943    0.23385   3.397
# healthExcellent  0.9023    0.24230   3.724
# treatmentone    -0.0377    0.07955  -0.473
# treatmenttwo    -0.0405    0.07962  -0.508

#################################
########### ON LEAVING HOME
#################################

causal_lh_viz_df <- responses_df %>%
    mutate(Q24=ifelse(Q24 == "Yes", 1, 0)) %>%
    group_by(condition) %>%
    summarise(avg=mean(Q24))

causal_lh_viz_df %>%
    ggplot(aes(y=avg, x=condition)) + 
    geom_bar(stat="identity", color="black", position= position_dodge(width=0.9)) +
    ylab("proportion yes") +
    xlab(sprintf("need to leave\nhome this week? (n=%i)", nrow(responses_df))) +
    theme_bw()
ggsave("fig/causal_leavehome.pdf", width=8, height=4)
ggsave("fig/causal_leavehome.png", width=8, height=4)


#################################
########### ON ANXIETY
#################################
anx_cols <- c(
	"anxiety_1", #nervous about current circumstances
	"anxiety_2", #calm/relaxed
	"anxiety_3", #worried about my health
	"anxiety_4", #worried about my family's health
	"anxiety_5" #stressed about leaving my house
)
levels(responses_df$anxiety_2) <- rev(levels(responses_df$anxiety_2))

causal_anx_df <- responses_df[,c(anx_cols, "condition")] %>%
	mutate(anxiety_1=as.numeric(anxiety_1),
		anxiety_2=as.numeric(anxiety_2),
		anxiety_3=as.numeric(anxiety_3),
		anxiety_4=as.numeric(anxiety_4),
		anxiety_5=as.numeric(anxiety_5)
	) %>%
	tidyr::gather("outcome", "value", anx_cols) %>%
	mutate(outcome = ifelse(
		outcome == "anxiety_1", "nervous about\ncurrent times",
		ifelse(
			outcome == "anxiety_2", "calm\n/relaxed",
			ifelse(
				outcome == "anxiety_3", "worried about\nmy health",
				ifelse(
					outcome == "anxiety_4", "worried about\n family's health",
					ifelse(
						outcome == "anxiety_5", "stressed to\nleave house", NA
					))))))

causal_anx_viz_df <- causal_anx_df %>%
	group_by(condition, outcome) %>%
	summarise(avg=mean(value, na.rm=T), sd=sd(value, na.rm=T))

#TODO: visualize overall index

causal_anx_viz_df %>%
	ggplot(aes(x=outcome, y=avg, fill=condition)) + 
  	geom_bar(stat="identity", color="black", position= position_dodge(width=0.9)) +
  	geom_errorbar(aes(ymin = avg-sd, ymax = avg+sd),  position = position_dodge(width=0.9), width = 0.25) +
  	ylab("avg level (5-point scale)") +
  	xlab(sprintf("anxiety question (n=%i)", nrow(responses_df))) +
  	theme_bw()
ggsave("fig/causal_anxiety.pdf", width=8, height=4)
ggsave("fig/causal_anxiety.png", width=8, height=4)

mod_anx <- lm(anxiety_index ~ age_years + gender + health + treatment, responses_df)
summary(mod_anx)
# Call:
# lm(formula = anxiety_index ~ age_years + gender + health + treatment, 
#     data = responses_df)

# Residuals:
#      Min       1Q   Median       3Q      Max 
# -0.53442 -0.09039  0.00575  0.10603  0.33498 

# Coefficients:
#                   Estimate Std. Error t value Pr(>|t|)    
# (Intercept)      0.7446125  0.0228904  32.530  < 2e-16 ***
# age_years       -0.0000739  0.0002062  -0.358  0.72011    
# genderFemale     0.0539907  0.0058504   9.229  < 2e-16 ***
# genderOther     -0.0456979  0.0434228  -1.052  0.29272    
# healthFair      -0.0105790  0.0202426  -0.523  0.60129    
# healthGood      -0.0210832  0.0196469  -1.073  0.28333    
# healthExcellent -0.0669789  0.0204060  -3.282  0.00104 ** 
# treatmentone    -0.0010953  0.0071139  -0.154  0.87765    
# treatmenttwo    -0.0037338  0.0071141  -0.525  0.59974    
# ---
# Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

# Residual standard error: 0.1432 on 2436 degrees of freedom
#   (107 observations deleted due to missingness)
# Multiple R-squared:  0.05426,	Adjusted R-squared:  0.05115 
# F-statistic: 17.47 on 8 and 2436 DF,  p-value: < 2.2e-16

hist(responses_df$anxiety_index[responses_df$gender == "Female"])
hist(responses_df$anxiety_index[responses_df$gender == "Male"])

#################################
########### ON MESSAGE-SHARING
#################################
qcols <- colnames(responses_df)[grepl("Q62", colnames(responses_df))]
causal_sharing_df <- responses_df
causal_sharing_df$nshared <- apply(responses_df[,qcols], 1, function(row) sum(row != ""))
causal_sharing_df$nshared_email <- apply(responses_df[,c("Q62_5","Q62_6","Q62_7","Q62_8","Q62_9")], 1, function(row) sum(row != ""))
causal_sharing_df$nshared_phone <- apply(responses_df[,c("Q62_11","Q62_12","Q62_13","Q62_14","Q62_15")], 1, function(row) sum(row != ""))

causal_sharing_df <- causal_sharing_df[,c("nshared", "nshared_email", "nshared_phone", "condition")] %>%
	tidyr::gather("outcome", "value", c("nshared", "nshared_email", "nshared_phone"))

causal_sharing_viz_bygroup_df <- causal_sharing_df %>%
	group_by(condition, outcome) %>%
	summarise(avg=mean(value), sd=sd(value))

causal_sharing_viz_all_df <- causal_sharing_df %>%
	group_by(outcome) %>%
	summarise(avg=mean(value), sd=sd(value))
causal_sharing_viz_all_df$condition = "all"

causal_sharing_viz_df <- rbind(
	as.data.frame(causal_sharing_viz_bygroup_df[c("condition", "outcome", "avg", "sd")]),  
	as.data.frame(causal_sharing_viz_all_df[c("condition", "outcome", "avg", "sd")])
)

causal_sharing_viz_df$type <- ifelse(
	causal_sharing_viz_df$outcome == "nshared", "email and phone",
	ifelse(
		causal_sharing_viz_df$outcome == "nshared_email", "email", "phone"
	)
)

# causal_sharing_viz_df$condition <- gsub("+ ", "\n + ", causal_sharing_viz_df$condition)
causal_sharing_viz_df %>%
	ggplot(aes(x=condition, y=avg, fill=type)) + 
  	geom_bar(stat="identity", color="black", position= position_dodge(width=0.9)) +
  	geom_errorbar(aes(ymin = avg-sd, ymax = avg+sd),  position = position_dodge(width=0.9), width = 0.25) +
  	ylab("avg # of contacts shared") +
  	xlab(sprintf("treatment condition (n=%i)", nrow(responses_df))) +
  	theme_bw()
ggsave("fig/causal_share.pdf", width=8, height=4)
ggsave("fig/causal_share.png", width=8, height=4)

#----------------------------------------------------------
# What did people reflect about?
#----------------------------------------------------------

ref_df <- responses_df[responses_df$Reflection != "",]
ref <- ref_df$Reflection
##random: most ppl expressing responsibility, some expressing own emotions
sample(ref, 100)

##young ppl: most ppl expressing responsibility, many expressing own emotions
sample(ref_df$Reflection[ref_df$age_years %in% 18:29], 100)

##older ppl: upset that many others are breaking the rules/going out
sample(ref_df$Reflection[ref_df$age_years > 60], 100)

##bad-behavior folks: 
##-1 person said it's not always good to stay home and that 'nature is best healer'
##-other garbage
ref_df <- ref_df[!is.na(ref_df$past_healthy_behavior),]
ref_df$Reflection[ref_df$past_behavior_index < 50]

##extreme-rxn folks:
##-economy (2/10)
##-sick/tired (1/10)
##-exaggeration (2/10)
ref_df$Reflection[ref_df$perceivedreaction == "The reaction is much too extreme"]

##low-trust-in-govt folks: mostly taking it serious, some say govt hasn't done enough
ref_df$Reflection[ref_df$Q36 == "Strongly distrust"]

##low-factual-acc-govt folks
ref_df$Reflection[ref_df$Q37 == "Very untruthful"]

cor(as.numeric(ref_df$Q36), as.numeric(ref_df$Q37))
# [1] 0.6295798

